/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

using System;
using System.Text;

namespace Lucene.Net.Analysis.Fr
{
    
/*
 * A stemmer for French words. 
 * <p>
 * The algorithm is based on the work of
 * Dr Martin Porter on his snowball project<br>
 * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
 * (French stemming algorithm) for details
 * </p>
 */

public class FrenchStemmer {

    /*
     * Buffer for the terms while stemming them.
     */
    private StringBuilder sb = new StringBuilder();

    /*
     * A temporary buffer, used to reconstruct R2
     */
     private StringBuilder tb = new StringBuilder();

    /*
     * Region R0 is equal to the whole buffer
     */
    private String R0;

    /*
     * Region RV
     * "If the word begins with two vowels, RV is the region after the third letter,
     * otherwise the region after the first vowel not at the beginning of the word,
     * or the end of the word if these positions cannot be found."
     */
    private String RV;

    /*
     * Region R1
     * "R1 is the region after the first non-vowel following a vowel
     * or is the null region at the end of the word if there is no such non-vowel"
     */
    private String R1;

    /*
     * Region R2
     * "R2 is the region after the first non-vowel in R1 following a vowel
     * or is the null region at the end of the word if there is no such non-vowel"
     */
    private String R2;


    /*
     * Set to true if we need to perform step 2
     */
    private bool suite;

    /*
     * Set to true if the buffer was modified
     */
    private bool modified;


    /*
     * Stems the given term to a unique <tt>discriminator</tt>.
     *
     * @param term  java.langString The term that should be stemmed
     * @return java.lang.String  Discriminator for <tt>term</tt>
     */
    protected internal String Stem( String term ) {
        if ( !IsStemmable( term ) ) {
            return term;
        }

        // Use lowercase for medium stemming.
        term = term.ToLower();

        // Reset the StringBuilder.
        sb.Length =  0;
        sb.Insert( 0, term );

        // reset the bools
        modified = false;
        suite = false;

        sb = TreatVowels( sb );

        SetStrings();

        Step1();

        if (!modified || suite)
        {
            if (RV != null)
            {
                suite = Step2A();
                if (!suite)
                    Step2B();
            }
        }

        if (modified || suite)
            Step3();
        else
            Step4();

        Step5();

        Step6();

        return sb.ToString();
    }

    /*
     * Sets the search region Strings<br>
     * it needs to be done each time the buffer was modified
     */
    private void SetStrings() {
        // set the strings
        R0 = sb.ToString();
        RV = RetrieveRV( sb );
        R1 = RetrieveR( sb );
        if ( R1 != null )
        {
            tb.Length =  0;
            tb.Insert( 0, R1 );
            R2 = RetrieveR( tb );
        }
        else
            R2 = null;
    }

    /*
     * First step of the Porter Algorithm<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step1( ) {
        String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
        DeleteFrom( R2, suffix );

        ReplaceFrom( R2, new String[] { "logies", "logie" }, "log" );
        ReplaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
        ReplaceFrom( R2, new String[] { "ences", "ence" }, "ent" );

        String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
        DeleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );

        DeleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
        DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
        DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
        DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
        DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );

        DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
        DeleteFrom( RV, new String[] { "ements", "ement" } );

        DeleteButSuffixFromElseReplace(R2, new [] { "it\u00e9s", "it\u00e9" }, "abil", false, R0, "abl");
        DeleteButSuffixFromElseReplace(R2, new [] { "it\u00e9s", "it\u00e9" }, "ic", false, R0, "iqU");
        DeleteButSuffixFrom(R2, new [] { "it\u00e9s", "it\u00e9" }, "iv", true);

        String[] autre = { "ifs", "ives", "if", "ive" };
        DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
        DeleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );

        ReplaceFrom( R0, new String[] { "eaux" }, "eau" );

        ReplaceFrom( R1, new String[] { "aux" }, "al" );

        DeleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );

        DeleteFrom( R2, new String[] { "eux" } );

        // if one of the next steps is performed, we will need to perform step2a
        bool temp = false;
        temp = ReplaceFrom( RV, new String[] { "amment" }, "ant" );
        if (temp == true)
            suite = true;
        temp = ReplaceFrom( RV, new String[] { "emment" }, "ent" );
        if (temp == true)
            suite = true;
        temp = DeleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
        if (temp == true)
            suite = true;

    }

    /*
     * Second step (A) of the Porter Algorithm<br>
     * Will be performed if nothing changed from the first step
     * or changed were done in the amment, emment, ments or ment suffixes<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     *
     * @return bool - true if something changed in the StringBuilder
     */
    private bool Step2A() {
        String[] search = { "\u00eemes", "\u00eetes", "iraIent", "irait", "irais", "irai", "iras", "ira",
                            "irent", "iriez", "irez", "irions", "irons", "iront",
                            "issaIent", "issais", "issantes", "issante", "issants", "issant",
                            "issait", "issais", "issions", "issons", "issiez", "issez", "issent",
                            "isses", "isse", "ir", "is", "\u00eet", "it", "ies", "ie", "i" };
        return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
    }

    /*
     * Second step (B) of the Porter Algorithm<br>
     * Will be performed if step 2 A was performed unsuccessfully<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step2B() {
        String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
                            "erons", "eront","erez", "\u00e8rent", "era", "\u00e9es", "iez",
                            "\u00e9e", "\u00e9s", "er", "ez", "\u00e9" };
        DeleteFrom( RV, suffix );

        String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
                            "antes", "aIent", "Aient", "ante", "\u00e2mes", "\u00e2tes", "ants", "ant",
                            "ait", "a\u00eet", "ais", "Ait", "A\u00eet", "Ais", "\u00e2t", "as", "ai", "Ai", "a" };
        DeleteButSuffixFrom( RV, search, "e", true );

        DeleteFrom( R2, new String[] { "ions" } );
    }

    /*
     * Third step of the Porter Algorithm<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step3() {
        if (sb.Length>0)
        {
            char ch = sb[ sb.Length -1];
            if (ch == 'Y')
            {
                sb[sb.Length -1] = 'i' ;
                SetStrings();
            }
            else if (ch == 'ç')
            {
                sb[sb.Length -1] = 'c';
                SetStrings();
            }
        }
    }

    /*
     * Fourth step of the Porter Algorithm<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step4() {
        if (sb.Length > 1)
        {
            char ch = sb[ sb.Length -1];
            if (ch == 's')
            {
                char b = sb[ sb.Length -2];
                if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
                {
                    sb.Length = sb.Length - 1;
                    SetStrings();
                }
            }
        }
        bool found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
        if (!found)
        found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );

        ReplaceFrom(RV, new String[] { "I\u00e8re", "i\u00e8re", "Ier", "ier" }, "i");
        DeleteFrom( RV, new String[] { "e" } );
        DeleteFromIfPrecededIn(RV, new String[] { "\u00eb" }, R0, "gu");
    }

    /*
     * Fifth step of the Porter Algorithm<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step5() {
        if (R0 != null)
        {
            if (R0.EndsWith("enn") || R0.EndsWith("onn") || R0.EndsWith("ett") || R0.EndsWith("ell") || R0.EndsWith("eill"))
            {
                sb.Length =  sb.Length - 1;
                SetStrings();
            }
        }
    }

    /*
     * Sixth (and last!) step of the Porter Algorithm<br>
     * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
     */
    private void Step6() {
        if (R0!=null && R0.Length>0)
        {
            bool seenVowel = false;
            bool seenConson = false;
            int pos = -1;
            for (int i = R0.Length-1; i > -1; i--)
            {
                char ch = R0[i] ;
                if (IsVowel(ch))
                {
                    if (!seenVowel)
                    {
                        if (ch == 'é' || ch == 'è')
                        {
                            pos = i;
                            break;
                        }
                    }
                    seenVowel = true;
                }
                else
                {
                    if (seenVowel)
                        break;
                    else
                        seenConson = true;
                }
            }
            if (pos > -1 && seenConson && !seenVowel)
                sb[pos] = 'e';
        }
    }

    /*
     * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
     *
     * @param source java.lang.String - the primary source zone for search
     * @param search java.lang.String[] - the strings to search for suppression
     * @param from java.lang.String - the secondary source zone for search
     * @param prefix java.lang.String - the prefix to add to the search string to test
     * @return bool - true if modified
     */
    private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
        bool found = false;
        if (source!=null )
        {
            for (int i = 0; i < search.Length; i++) {
                if ( source.EndsWith( search[i] ))
                {
                    if (from!=null && from.EndsWith( prefix + search[i] ))
                    {
                        sb.Length =  sb.Length - search[i].Length;
                        found = true;
                        SetStrings();
                        break;
                    }
                }
            }
        }
        return found;
    }

    /*
     * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
     *
     * @param source java.lang.String - the primary source zone for search
     * @param search java.lang.String[] - the strings to search for suppression
     * @param vowel bool - true if we need a vowel before the search string
     * @param from java.lang.String - the secondary source zone for search (where vowel could be)
     * @return bool - true if modified
     */
    private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search, bool vowel, String from ) {
        bool found = false;
        if (source!=null && from!=null)
        {
            for (int i = 0; i < search.Length; i++) {
                if ( source.EndsWith( search[i] ))
                {
                    if ((search[i].Length + 1) <= from.Length)
                    {
                        bool test = IsVowel(sb[sb.Length -(search[i].Length+1)]);
                        if (test == vowel)
                        {
                            sb.Length =  sb.Length - search[i].Length;
                            modified = true;
                            found = true;
                            SetStrings();
                            break;
                        }
                    }
                }
            }
        }
        return found;
    }

    /*
     * Delete a suffix searched in zone "source" if preceded by the prefix
     *
     * @param source java.lang.String - the primary source zone for search
     * @param search java.lang.String[] - the strings to search for suppression
     * @param prefix java.lang.String - the prefix to add to the search string to test
     * @param without bool - true if it will be deleted even without prefix found
     */
    private void DeleteButSuffixFrom( String source, String[] search, String prefix, bool without ) {
        if (source!=null)
        {
            for (int i = 0; i < search.Length; i++) {
                if ( source.EndsWith( prefix + search[i] ))
                {
                    sb.Length =  sb.Length - (prefix.Length + search[i].Length);
                    modified = true;
                    SetStrings();
                    break;
                }
                else if ( without && source.EndsWith( search[i] ))
                {
                    sb.Length =  sb.Length - search[i].Length;
                    modified = true;
                    SetStrings();
                    break;
                }
            }
        }
    }

    /*
     * Delete a suffix searched in zone "source" if preceded by prefix<br>
     * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
     * or delete the suffix if specified
     *
     * @param source java.lang.String - the primary source zone for search
     * @param search java.lang.String[] - the strings to search for suppression
     * @param prefix java.lang.String - the prefix to add to the search string to test
     * @param without bool - true if it will be deleted even without prefix found
     */
    private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix, bool without, String from, String replace ) {
        if (source!=null)
        {
            for (int i = 0; i < search.Length; i++) {
                if ( source.EndsWith( prefix + search[i] ))
                {
                    sb.Length =  sb.Length - (prefix.Length + search[i].Length);
                    modified = true;
                    SetStrings();
                    break;
                }
                else if ( from!=null && from.EndsWith( prefix + search[i] ))
                {
                    // java equivalent of replace
                    sb.Length = sb.Length - (prefix.Length + search[i].Length);
                    sb.Append(replace);

                    modified = true;
                    SetStrings();
                    break;
                }
                else if ( without && source.EndsWith( search[i] ))
                {
                    sb.Length =  sb.Length - search[i].Length;
                    modified = true;
                    SetStrings();
                    break;
                }
            }
        }
    }

    /*
     * Replace a search string with another within the source zone
     *
     * @param source java.lang.String - the source zone for search
     * @param search java.lang.String[] - the strings to search for replacement
     * @param replace java.lang.String - the replacement string
     */
    private bool ReplaceFrom( String source, String[] search, String replace ) {
        bool found = false;
        if (source!=null)
        {
            for (int i = 0; i < search.Length; i++) {
                if ( source.EndsWith( search[i] ))
                {
                    // java equivalent for replace
                    sb.Length = sb.Length - search[i].Length;
                    sb.Append(replace);

                    modified = true;
                    found = true;
                    SetStrings();
                    break;
                }
            }
        }
        return found;
    }

    /*
     * Delete a search string within the source zone
     *
     * @param source the source zone for search
     * @param suffix the strings to search for suppression
     */
    private void DeleteFrom(String source, String[] suffix ) {
        if (source!=null)
        {
            for (int i = 0; i < suffix.Length; i++) {
                if (source.EndsWith( suffix[i] ))
                {
                    sb.Length = sb.Length - suffix[i].Length;
                    modified = true;
                    SetStrings();
                    break;
                }
            }
        }
    }

    /*
     * Test if a char is a french vowel, including accentuated ones
     *
     * @param ch the char to test
     * @return bool - true if the char is a vowel
     */
    private bool IsVowel(char ch) {
        switch (ch)
        {
            case 'a':
            case 'e':
            case 'i':
            case 'o':
            case 'u':
            case 'y':
            case 'â':
            case 'à':
            case 'ë':
            case 'é':
            case 'ê':
            case 'è':
            case 'ï':
            case 'î':
            case 'ô':
            case 'ü':
            case 'ù':
            case 'û':
                return true;
            default:
                return false;
        }
    }

    /*
     * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
     * "R is the region after the first non-vowel following a vowel
     * or is the null region at the end of the word if there is no such non-vowel"<br>
     * @param buffer java.lang.StringBuilder - the in buffer
     * @return java.lang.String - the resulting string
     */
    private String RetrieveR( StringBuilder buffer ) {
        int len = buffer.Length;
        int pos = -1;
        for (int c = 0; c < len; c++) {
            if (IsVowel( buffer[ c ] ))
            {
                pos = c;
                break;
            }
        }
        if (pos > -1)
        {
            int consonne = -1;
            for (int c = pos; c < len; c++) {
                if (!IsVowel(buffer[ c ] ))
                {
                    consonne = c;
                    break;
                }
            }
            if (consonne > -1 && (consonne+1) < len)
                return buffer.ToString(consonne + 1, len - (consonne+1));
            else
                return null;
        }
        else
            return null;
    }

    /*
     * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
     * "If the word begins with two vowels, RV is the region after the third letter,
     * otherwise the region after the first vowel not at the beginning of the word,
     * or the end of the word if these positions cannot be found."<br>
     * @param buffer java.lang.StringBuilder - the in buffer
     * @return java.lang.String - the resulting string
     */
    private String RetrieveRV( StringBuilder buffer ) {
        int len = buffer.Length;
        if ( buffer.Length > 3)
        {
            if ( IsVowel(buffer[ 0 ] ) && IsVowel(buffer[ 1 ] )) {
                return buffer.ToString(3, len - 3);
            }
            else
            {
                int pos = 0;
                for (int c = 1; c < len; c++) {
                    if (IsVowel( buffer[ c ] ))
                    {
                        pos = c;
                        break;
                    }
                }
                if ( pos+1 < len )
                    return buffer.ToString(pos + 1, len - (pos+1));
                else
                    return null;
            }
        }
        else
            return null;
    }



    /*
     * Turns u and i preceded AND followed by a vowel to UpperCase<br>
     * Turns y preceded OR followed by a vowel to UpperCase<br>
     * Turns u preceded by q to UpperCase<br>
     *
     * @param buffer java.util.StringBuilder - the buffer to treat
     * @return java.util.StringBuilder - the treated buffer
     */
    private StringBuilder TreatVowels( StringBuilder buffer ) {
        for ( int c = 0; c < buffer.Length; c++ ) {
            char ch = buffer[ c ] ;

            if (c == 0) // first char
            {
                if (buffer.Length>1)
                {
                    if (ch == 'y' && IsVowel(buffer[ c + 1 ] ))
                        buffer[c] = 'Y';
                }
            }
            else if (c == buffer.Length-1) // last char
            {
                if (ch == 'u' && buffer[ c - 1 ] == 'q')
                    buffer[c] = 'U';
                if (ch == 'y' && IsVowel(buffer[ c - 1 ] ))
                    buffer[c] = 'Y';
            }
            else // other cases
            {
                if (ch == 'u')
                {
                    if (buffer[ c - 1]  == 'q')
                        buffer[c] = 'U';
                    else if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
                        buffer[c] = 'U';
                }
                if (ch == 'i')
                {
                    if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
                        buffer[c] = 'I';
                }
                if (ch == 'y')
                {
                    if (IsVowel(buffer[ c - 1 ] ) || IsVowel(buffer[ c + 1 ] ))
                        buffer[c] = 'Y';
                }
            }
        }

        return buffer;
    }

    /*
     * Checks a term if it can be processed correctly.
     *
     * @return bool - true if, and only if, the given term consists in letters.
     */
    private bool IsStemmable( String term ) {
        bool upper = false;
        int first = -1;
        for ( int c = 0; c < term.Length; c++ ) {
            // Discard terms that contain non-letter chars.
            if ( !char.IsLetter( term[c] ) ) {
                return false;
            }
            // Discard terms that contain multiple uppercase letters.
            if ( char.IsUpper( term[ c] ) ) {
                if ( upper ) {
                    return false;
                }
            // First encountered uppercase letter, set flag and save
            // position.
                else {
                    first = c;
                    upper = true;
                }
            }
        }
        // Discard the term if it contains a single uppercase letter that
        // is not starting the term.
        if ( first > 0 ) {
            return false;
        }
        return true;
    }
}

}
